#include "mcmc_genotyper.hpp"
#include "subgraph.hpp"
#include <algorithm> 
#include <random> 
#include <chrono>
#include <utility>
#include "multipath_alignment.hpp"

// #define stdout_for_performance_script
// #define debug_mcmc
// #define debug_make_snarl_graph
// #define debug_karger_stein
// #define debug_proposal_sample

namespace vg {

    using namespace std;

    MCMCGenotyper::MCMCGenotyper(SnarlManager& snarls, VG& graph, const int n_iterations, const int seed, const int burn_in, const int frequency):snarls(snarls), graph(graph), n_iterations(n_iterations), 
        seed(seed), random_engine(seed), burn_in(burn_in), frequency(frequency){
          
    
    }

    unique_ptr<PhasedGenome> MCMCGenotyper::run_genotype(const vector<multipath_alignment_t>& reads, const double log_base) const{

        // set a flag for invalid contents so a message is observed 
        bool return_optimal = false;

        // generate initial value
        unique_ptr<PhasedGenome> genome = generate_initial_guess();
        
        double max_likelihood = 0.0; 
        double current_likelihood = 0.0; 
        double previous_likelihood = 0.0;
        int haplotype_0 =0;
        int haplotype_1 =1;
        unique_ptr<PhasedGenome> optimal;

        //stores entire set generated by karger-stein min cut    
        unique_ptr<vector<unordered_set<size_t>>> gamma(nullptr);
        //stores the sites we swapped alleles at - returned by alt_proposal
        unique_ptr<unordered_set<size_t>> to_swap_back(nullptr);
        int count =0;
        enum sample_mode {PROPOSAL_ORIGINAL, PROPOSAL_KARGER_STEIN};       
        // build markov chain using Metropolis-Hastings
        for(int i = 0; i< n_iterations; i++){ 
            
            int random_num;

            // holds the previous sample allele
            double x_prev = log_target(genome, reads);

            tuple<int, const Snarl*, vector<NodeTraversal> > to_receive;
            int* modified_haplo;
            const Snarl* modified_site; 
            vector<NodeTraversal>* old_allele;

            if(i < burn_in){
                random_num = PROPOSAL_ORIGINAL;
#ifdef stdout_for_performance_script 
                cerr << "ITERATION " << i <<endl; 
                cerr << "RANDOM NUM " << random_num <<endl;             
#endif
            }else{
                //choose between sample proposals randomly/uniformly
                random_num = generate_discrete_uniform(random_engine, PROPOSAL_ORIGINAL, PROPOSAL_KARGER_STEIN);
                count++;
                //generate gamma after burn in 
                if(i == burn_in){
                    gamma.reset(new vector<unordered_set<size_t>> (karger_stein(reads, *genome))); 
#ifdef stdout_for_performance_script 
                    cerr << "building gamma " <<endl;
                    cerr << "gamma size " << gamma->size() << endl;      
#endif
                }
                //generate gamma with n frequency after burn in 
                if(count == frequency){
                    gamma.reset(new vector<unordered_set<size_t>> (karger_stein(reads, *genome)));
                    count = 0;//reset counter
#ifdef stdout_for_performance_script 
                    cerr << "building gamma " <<endl; 
                    cerr << "gamma size " << gamma->size() << endl;      
#endif
                }
                if(gamma->size() <= 0){
#ifdef stdout_for_performance_script 
                    cerr << "empty gamma "<<endl;        
#endif 
                    break;
                }
#ifdef stdout_for_performance_script 
                cerr << "COUNT " << count <<endl;
                cerr << "ITERATION  " << i <<endl;  
                cerr << "RANDOM NUM " << random_num <<endl;    
#endif

            }

            /*
            *########################################################################################
            *                      SAMPLE
            *########################################################################################
            **/
            if(random_num == PROPOSAL_KARGER_STEIN){
#ifdef stdout_for_performance_script 
                cerr << "karger stein proposal" <<endl;        
#endif
                //use alt proposal sample, store the sites we swapped in case we reject sample
                to_swap_back.reset(new unordered_set<size_t> (alt_proposal_sample(*gamma, *genome)));
                if(!to_swap_back){
#ifdef stdout_for_performance_script 
                    cerr << "to_swap_back() is empty" <<endl;        
#endif
                    break;
                }
                
            }
            if(random_num == PROPOSAL_ORIGINAL){ //otherwise choose original
#ifdef stdout_for_performance_script 
                cerr << "original proposal" <<endl;
                cerr << "phased genome: " <<endl;
                genome->print_phased_genome(); 
                cerr << "modified_haplo before "<< *modified_haplo <<endl;       
#endif
                to_receive = proposal_sample(genome);
                modified_haplo = &get<0>(to_receive); 
                // if the to_receive contents are invalid keep the new allele
                // for graphs that do not contain snarls
                if (*modified_haplo ==-1){
#ifdef stdout_for_performance_script 
                    cerr << "modified haplo is empty" <<endl;        
#endif
                    continue;
                }else{
                    modified_site = get<1>(to_receive); 
                    old_allele = &get<2>(to_receive); 
                }
            }

            /*
            *########################################################################################
            *                      ACCEPT/REJECT SAMPLE 
            *########################################################################################
            **/
            // holds new sample allele score 
            double x_new = log_target(genome, reads);

            double likelihood_ratio = exp(log_base*(x_new - x_prev));
            
            current_likelihood = previous_likelihood + log_base*(x_new-x_prev);
            
            if (current_likelihood > max_likelihood){
                max_likelihood = current_likelihood;
                optimal.reset(new PhasedGenome(*genome));
                return_optimal=true;
            }
            
            // calculate acceptance probability 
            double acceptance_probability = min(1.0, likelihood_ratio);

            // if u~U(0,1) > alpha, discard new allele and keep previous 
            auto uniform_smpl = generate_continuous_uniform(0.0,1.0);
            if(uniform_smpl > acceptance_probability){ //REJECT
                if(random_num==PROPOSAL_ORIGINAL){ //if rand num==0, use proposal sample swap back method
                    //swap back to old allele at random snarl, random haplo
                    genome->set_allele(*modified_site, old_allele->begin(), old_allele->end(), *modified_haplo); 
                }
                if(random_num == PROPOSAL_KARGER_STEIN){ //or if, use alt proposal sample swap back method
                    //swap alleles at all sites in to_swap_back
                    for(auto iter = to_swap_back->begin(); iter != to_swap_back->end(); ++iter){
                        const Snarl* snarl_to_swap = snarls.translate_snarl_num(*iter); 
                        genome->swap_alleles(*snarl_to_swap, haplotype_0, haplotype_1);
                    }
                }
                
#ifdef stdout_for_performance_script
                cerr << "Rejected new allele" <<endl;
                cerr << "clikelihood " << previous_likelihood <<endl;
                // genome->print_phased_genome();
#endif                    
            }else{     
#ifdef stdout_for_performance_script
                cerr << "Accepted new allele" <<endl;
                cerr << "clikelihood " << current_likelihood <<endl;
                // genome->print_phased_genome();
#endif
                previous_likelihood = current_likelihood;   //ACCEPT
            }
        } 
        if(!return_optimal){
            // for graphs without snarls 
            return genome; 
        }else{
#ifdef stdout_for_performance_script 
            cerr <<"clikelihood " << max_likelihood <<endl;
            // optimal->print_phased_genome();
#endif
            return optimal; 
        }

    }   
    double MCMCGenotyper::log_target(unique_ptr<PhasedGenome>& phased_genome, const vector<multipath_alignment_t>& reads)const{
        
        // sum of scores given the reads aligned on the haplotype 
        int32_t sum_scores = 0; 
        
        // get scores for mp alignments 
        for(const multipath_alignment_t& mp : reads){
            sum_scores += phased_genome->optimal_score_on_genome(mp, graph);
            
        } 
        
        return sum_scores;
    }

    tuple<int, const Snarl*, vector<NodeTraversal> > MCMCGenotyper::proposal_sample(unique_ptr<PhasedGenome>& current)const{
        // get a different traversal through the snarl by uniformly choosing from all possible ways to traverse the snarl
        
        // bookkeeping: haplotype ID, snarl* (site that we replaced at), get_allele())
        tuple<int, const Snarl*, vector<NodeTraversal> > to_return;

        int& random_haplotype = get<0>(to_return);
        const Snarl*& random_snarl = get<1>(to_return);
        // the random path through the snarl 
        vector<NodeTraversal>& old_allele = get<2>(to_return);
        
        // sample uniformly between snarls 
        random_snarl = snarls.discrete_uniform_sample(random_engine);
#ifdef debug_proposal_sample
        bool is_null =  (random_snarl== nullptr);
        cerr << "is null " << is_null <<endl;
        cerr << random_snarl <<endl;
#endif
        if(random_snarl == nullptr){
#ifdef debug_proposal_sample
            cerr << "random snarl is null" << endl;
#endif
            random_haplotype = -1;
            return to_return;
        }


        // get list of haplotypes that contain the snarl
        vector<id_t> matched_haplotypes = current->get_haplotypes_with_snarl(random_snarl);


        if(matched_haplotypes.empty()){
#ifdef debug_proposal_sample
            cerr << "looking for snarl starting with " << random_snarl->start() << " and snarl ending with " << random_snarl->end() <<endl;
#endif
            random_haplotype = -1;
            return to_return;
            
        }

        // choose a haplotype uiformly 
        id_t lower_bound = 0;
        id_t upper_bound = matched_haplotypes.size()-1;

        int random_num = generate_discrete_uniform(random_engine, lower_bound, upper_bound);
        random_haplotype = matched_haplotypes[random_num];


        pair<unordered_set<id_t>, unordered_set<edge_t> > contents = snarls.deep_contents(random_snarl, graph, true);

        // unpack the pair, we only care about the node_ids
        unordered_set<id_t>& ids = contents.first;
            
        // enumerate counts through nodes in snarl not the entire graph
        SubHandleGraph subgraph(&graph);
        
        for (id_t id : ids){
            
            // add each node from snarl in super graph to sub graph
            subgraph.add_handle(graph.get_handle(id, false));
        }
        
        
        // create a count_map of the subgraph
        auto count_contents = handlealgs::count_walks_through_nodes(&subgraph);
        
        // unpack the count map from the count_contents 
        unordered_map<handle_t, size_t>& count_map = get<1>(count_contents);

        
        
        // create a topological order of sub graph count map
        vector<handle_t> topological_order = handlealgs::lazier_topological_order(&subgraph);

        //  we want to get just the sink handle handle
        handle_t start = topological_order.back();  
        handle_t source = topological_order.front();
        
        // start at sink in topological
        bool start_from_sink =true;
        bool not_source = true;

        vector<NodeTraversal> allele;
 
        while(not_source){

            size_t  cum_sum = 0;
            vector<size_t> cumulative_sum;
            vector<size_t> paths_to_take;
            size_t  count = 0;
            vector<handle_t> handles;
            
            subgraph.follow_edges(start, start_from_sink, [&](const handle_t& next) { 
                unordered_map<handle_t, size_t>::iterator it;
                it = count_map.find(next); // find the handle
                count = it->second; // get the count 
                cum_sum += count;
                cumulative_sum.push_back(cum_sum); 
                handles.push_back(next); 
        
            });

            // choose a random path uniformly
            int l_bound = 0;
            int u_bound = cumulative_sum.back()-1;
            int random_num = generate_discrete_uniform(random_engine,l_bound, u_bound);

            // use the random_num to select a random_handle
            int found = 0, prev = 0;
            for (int i = 0; i< cumulative_sum.size() ; i++) {
                // check what range the random_num falls in    
                if (prev <= random_num && random_num < cumulative_sum[i] ){
                    found = i; // will correspond to the index of handles
                    break; 
                }
                prev = cumulative_sum[i];
            } 

            assert(found != -1);

            // start_ptr will point to random handle 
            start = handles[found]; 
            
            // save the random path 
            bool position = subgraph.get_is_reverse(start);
            Node* n = graph.get_node(subgraph.get_id(start));


            // allele should not include boundary nodes of random_snarl
            if(n->id() != random_snarl->start().node_id() && n->id() != random_snarl->end().node_id() ){
                allele.push_back(NodeTraversal(n,position));
            }
            
            

            // check if we are at the source, if so we terminate loop
            if(start == source){
                not_source = false;
            }    
            
        }
        // save old allele so we can swap back to it if we need to 
        old_allele = current->get_allele(*random_snarl, random_haplotype);

#ifdef debug_mcmc 
        cerr << "modifying haplotype " << random_num << endl; 
        for(auto iter = allele.begin(); iter != allele.end(); iter++ ){
            cerr << "new allele: " <<"node " << iter->node->id() << " " << iter->node->sequence() <<endl;

        }
        for(auto iter = old_allele.begin(); iter != old_allele.end(); iter++ ){
            cerr << "old allele: " <<"node " << iter->node->id() <<  " " << iter->node->sequence() <<endl;

        }
#endif
        // set new allele with random allele, replace with previous allele 
        current->set_allele(*random_snarl , allele.rbegin(), allele.rend(), random_haplotype);
        
        
        return to_return;

    }
    int MCMCGenotyper::generate_discrete_uniform(minstd_rand0& random_engine, int lower_bound , int upper_bound) const{
        
        // choose a number randomly using discrete uniform distribution
        uniform_int_distribution<int> distribution(lower_bound, upper_bound);  
        int random_num = distribution(random_engine);

        return random_num;
    }
    double MCMCGenotyper::generate_continuous_uniform(const double a, const double b)const{
        
        uniform_real_distribution<double> distribution(a,b);
        double random_num = distribution(random_engine);
        
        return random_num;

    }
    unique_ptr<PhasedGenome> MCMCGenotyper::generate_initial_guess()const{
        
        unique_ptr<PhasedGenome> genome(new PhasedGenome(snarls));
        vector<NodeTraversal> haplotype; //will add twice  

        graph.for_each_path_handle([&](const path_handle_t& path){
        // capture all variables (paths) in scope by reference 

            if(!Paths::is_alt(graph.get_path_name(path))) {
            // If it isn't an alt path, we want to trace it
  
                for (handle_t handle : graph.scan_path(path)) {
                // For each occurrence from start to end
                    
                    // get the node and the node postion and add to the vector 
                    Node* node = graph.get_node(graph.get_id(handle));
                    bool position = graph.get_is_reverse(handle);
                    haplotype.push_back(NodeTraversal(node,position));
  
                }
            }
        });
        // construct haplotypes
        // haplotype1 = haplotype2
        genome->add_haplotype(haplotype.begin(), haplotype.end());
        genome->add_haplotype(haplotype.begin(), haplotype.end());

        // index sites
        genome->build_indices();

        return genome;
    }
    unordered_map<pair<const Snarl*, const Snarl*>, int32_t> MCMCGenotyper::make_snarl_map(const vector<multipath_alignment_t>& reads, PhasedGenome& phased_genome) const{
        
        
        unordered_map<pair<const Snarl*, const Snarl*>, int32_t> map;
        int32_t score_after_swap,score_before_swap,diff_score;

#ifdef debug_make_snarl_graph
        cerr << "******************************************************"<<endl;
        phased_genome.print_phased_genome();
        cerr << "******************************************************"<<endl;
        cerr << "number of reads " << reads.size()<<endl;     
        int count= 0;           
#endif 
        //loop over reads
        for(const multipath_alignment_t& multipath_aln : reads){
            unordered_set<const Snarl*> snarl_set;
#ifdef debug_make_snarl_graph
            vector<size_t> read_nodes; 
#endif
            //for each pair of snarls that touches that read
            for (const auto& subpath : multipath_aln.subpath()) {
                if(subpath.has_path()){
                    auto& path = subpath.path();
                    //for every mapping in the path
                    for(size_t i = 0; i < path.mapping_size(); i++){
                        auto& mapping = path.mapping(i);
                        int64_t node_id = mapping.position().node_id();
                        bool is_reverse = mapping.position().is_reverse();

#ifdef debug_make_snarl_graph
                        read_nodes.push_back(node_id);              
#endif 
                        
                        const Snarl* snarl = snarls.into_which_snarl(node_id, is_reverse);
                        // insert snarls into unordered set with only unique entries
                        //TODO: to make it faster remove the snarls not supported by read at boundary nodes
                        if(snarl){
                            snarl_set.insert(snarl);
                        }
// #ifdef debug_make_snarl_graph
//                             cerr <<"adding fwd_snarl " <<fwd_snarl->start().node_id() <<" -> " <<fwd_snarl->end().node_id() <<endl;
// #endif                        
                             
                    }
                    
                }
                
            }

#ifdef debug_make_snarl_graph
            cerr << "read " << count+1 << ": ";          
            for(size_t nodes: read_nodes){
                cerr << nodes << "->";
            }
            cerr << endl;   
            cerr << "snarl_set size " << snarl_set.size() <<endl; 
                    
#endif 
            vector<pair<const Snarl*, const Snarl*>> pairs;
            vector<const Snarl*> v(snarl_set.begin(), snarl_set.end());
            for(int i =0; i < v.size(); i++){
                for(int j =i+1; j < v.size(); j++){
                    //check if both haplotypes visited these snarls
// #ifdef debug_make_snarl_graph
//                 cerr <<"Searching Pair: " <<v[i]->start().node_id() <<" -> " <<v[i]->end().node_id();
//                 cerr <<" , " <<v[j]->start().node_id() <<" -> " <<v[j]->end().node_id() <<endl;
// #endif 
                    vector<id_t> haplo_ids1 = phased_genome.get_haplotypes_with_snarl(v[i]);
                    vector<id_t> haplo_ids2 = phased_genome.get_haplotypes_with_snarl(v[j]);

// #ifdef debug_make_snarl_graph
//                     if(!haplo_ids1.empty() && !haplo_ids2.empty()){
//                         if((haplo_ids1[0] == 0 && haplo_ids2[0] == 0) && (haplo_ids1[1] == 1 && haplo_ids2[1] == 1)){
//                             cerr << "both haplotypes overlap snarl pair" <<endl; 
//                         }
//                     }else{
//                         cerr << "1 haplotypes or no haplotypes overlap this snarl pair " <<endl;
//                     } 
                                    
// #endif
                    if(!haplo_ids1.empty() && !haplo_ids2.empty() ){
                        if(haplo_ids1.size()==2 && haplo_ids2.size()==2){
                            //make pairs of those snarls that are overlapped by both haplotypes
                            // pair should be ordered with smaller snarl num first
                            if(snarls.snarl_number(v[i]) < snarls.snarl_number(v[j])){
                                pairs.push_back(make_pair(v[i], v[j]));
                            }else{
                                pairs.push_back(make_pair(v[j], v[i]));
                            }
                            
                            
                        }
                    }
                }
            }
#ifdef debug_make_snarl_graph 
            
            cerr << "pairs size " << pairs.size()<<endl;
#endif

            
            //get_optimal_score_on_genome(genome_before_swap, read)
            score_before_swap = phased_genome.optimal_score_on_genome(multipath_aln, graph);
#ifdef debug_make_snarl_graph
            cerr << endl;
            cerr << "score_before_swap " << score_before_swap <<endl;                
#endif           
            for(auto snarl_ptr:pairs){ 
                int haplotype_0 =0;
                int haplotype_1 =1;
                //generate a random uniform number between [0,1]
                int random_num = generate_discrete_uniform(random_engine, haplotype_0, haplotype_1);
// #ifdef debug_make_snarl_graph
//                 cerr << "random_haplo_num " << random_num <<endl; 
                            
// #endif
                //exchange their alleles with each other at one of the snarls (chosen randomly)
                //TODO: for < 2 or > 2 haplotypes that overlap snarl pair , skip snarl pair for that read 
                if(random_num == 0){
                    //dereference the ptr
                    const Snarl& snarl_to_swap = *snarl_ptr.first;
                    //exhange alleles at first snarl in pair
                    phased_genome.swap_alleles(snarl_to_swap, haplotype_0, haplotype_1);
                    // get score after swap
                    score_after_swap = phased_genome.optimal_score_on_genome(multipath_aln, graph);
#ifdef debug_make_snarl_graph
                    // cerr << "genome after swap " << endl;
                    // phased_genome.print_phased_genome();
                    // cerr << "score_after_swap  " << score_after_swap  <<endl;
#endif
                                        

                    //swap back 
                    phased_genome.swap_alleles(snarl_to_swap, haplotype_0, haplotype_1);
                
                }else{
                    //else random num == 1
                    //dereference the ptr
                    const Snarl& snarl_to_swap = *snarl_ptr.second;
                    //exchange alleles at second snarl in pair
                    phased_genome.swap_alleles(snarl_to_swap, haplotype_0, haplotype_1);
                    // get score after swap
                    score_after_swap = phased_genome.optimal_score_on_genome(multipath_aln, graph);
#ifdef debug_make_snarl_graph
                    // cerr << "genome after swap " << endl;
                    // phased_genome.print_phased_genome();
                    // cerr << "score_after_swap  " << score_after_swap  <<endl;                              
#endif
                    //swap back 
                    phased_genome.swap_alleles(snarl_to_swap, haplotype_0, haplotype_1);
                }
            
                //getcalculate difference of scores between swaps
                diff_score = score_before_swap - score_after_swap;
#ifdef debug_make_snarl_graph
                cerr << "diff score " << diff_score <<endl;                
#endif
                map[make_pair(snarl_ptr.first, snarl_ptr.second)] += diff_score;
            }



            
#ifdef debug_make_snarl_graph
        cerr << "******************************************************"<<endl;
        cerr << endl;
        count++;             
#endif 

        }

        
        

#ifdef debug_make_snarl_graph
        cerr << "map size " << map.size() <<endl;                   
#endif            
        return  map;
    }

    algorithms::Graph MCMCGenotyper::make_snarl_graph(unordered_map<pair<const Snarl*, const Snarl*>, int32_t> map) const{
        //TODO: find where the SnarlRecord* are being added to deque and store the index in snarls.cpp
        
        algorithms::Graph snarl_graph;

        for(auto snarl_pair_to_weight: map){
            pair<const Snarl*, const Snarl*> snarl_pair = snarl_pair_to_weight.first;
            const Snarl* snarl_1 = snarl_pair.first;
            const Snarl* snarl_2 = snarl_pair.second;
// #ifdef debug_make_snarl_graph
//             cerr <<"snarl1 start->end" <<snarl_1->start().node_id() <<" -> " <<snarl_1->end().node_id() <<endl;
//             cerr <<"snarl2 start->end" <<snarl_2->start().node_id() <<" -> " <<snarl_2->end().node_id() <<endl;
// #endif
            // skip edge weights that are <1
            if(snarl_pair_to_weight.second < 1){
#ifdef debug_make_snarl_graph
                cerr << "removing edge with weight  " << snarl_pair_to_weight.second <<endl;                   
#endif 
                continue;
            }else{

                size_t edge_weight = (size_t)snarl_pair_to_weight.second;
#ifdef debug_make_snarl_graph
                cerr << "weight > 1 : " << edge_weight <<endl;                   
#endif 
                //node ids, get the ids from SnarlRecord index member 
                size_t snarl_id_1 = snarls.snarl_number(snarl_1);
                size_t snarl_id_2 = snarls.snarl_number(snarl_2);
#ifdef debug_make_snarl_graph
                cerr << "snarl 1 : " << snarl_id_1 <<endl;  
                cerr << "snarl 2 : " << snarl_id_2 <<endl;                   
#endif              
                algorithms::Node snarl_node_1, snarl_node_2;
                algorithms::Edge edge_fwd, edge_back;
                edge_fwd.weight = edge_weight;
                edge_back.weight = edge_weight;
                //add other node that touches edge coming from base node
                edge_fwd.other = snarl_id_2; //snarl_1 -> snarl_2
                edge_back.other = snarl_id_1; //snarl_2 -> snarl_1

#ifdef debug_make_snarl_graph
                cerr << "edge_fwd.other: " << edge_fwd.other <<endl;  
                cerr << "edge_back.other " << edge_back.other <<endl;                   
#endif 
                vector<size_t> node_ids = snarl_graph.get_node_ids();
                unordered_set<size_t> id_set(node_ids.begin(), node_ids.end());

                //if snarl node already exists in graph, we add to its edges vector
                if(id_set.count(snarl_id_1)){
                    snarl_graph.get_node_by_id(snarl_id_1).edges.push_back(edge_fwd);
                    snarl_graph.get_node_by_id(snarl_id_1).weight += edge_weight;
                }else{
                    //else we create a new node and add the node to the graph along with the edge
                    snarl_node_1.weight += edge_weight;
                    snarl_node_1.edges.push_back(edge_fwd);
                    snarl_graph.add_node(snarl_id_1, snarl_node_1);
                }
                
                //similarily for snarl 2, we check if it already exists and if so we add to its edge vector
                if(id_set.count(snarl_id_2)){
                    snarl_graph.get_node_by_id(snarl_id_2).edges.push_back(edge_back);
                    snarl_graph.get_node_by_id(snarl_id_2).weight += edge_weight;
                }else{
                    //else we create a new node and add the node to the graph along with the edge
                    snarl_node_2.edges.push_back(edge_back);
                    snarl_node_2.weight += edge_weight;
                    snarl_graph.add_node(snarl_id_2, snarl_node_2);
                }

            }

        }
        
        return snarl_graph; 

    }

    vector<unordered_set<size_t>> MCMCGenotyper::karger_stein(const vector<multipath_alignment_t>& reads, PhasedGenome& genome) const{

#ifdef debug_karger_stein   
        cerr << "num reads" << reads.size() <<endl;    
        genome.print_phased_genome();
#endif
        //make snarl map
        unordered_map<pair<const Snarl*, const Snarl*>, int32_t> snarl_map  = make_snarl_map(reads, genome);
        
        //make snarl graph
        algorithms::Graph snarl_graph =  make_snarl_graph(snarl_map);
#ifdef debug_karger_stein   
        cerr << "size of snarl map" << snarl_map.size() <<endl;    
        cerr << "size of snarl graph" << snarl_graph.get_size() <<endl;
#endif
        //generate set used to make swap in alt proposal dist
        vector<unordered_set<size_t>> to_recv =  algorithms::min_cut_decomposition(snarl_graph, seed);

#ifdef debug_karger_stein   
        cerr << "min decomposition size of gamma" << to_recv.size() <<endl;    
#endif
        return to_recv;
        
    }

    unordered_set<size_t> MCMCGenotyper::alt_proposal_sample(vector<unordered_set<size_t>>& gamma, PhasedGenome& genome ) const{
        
        int haplotype_0 =0;
        int haplotype_1 =1;
        int lower_bound = 0;
        int upper_bound = gamma.size();

#ifdef karger_stein 
            cerr << "rand num " << random_num<<endl;
            cerr << "upper_bound " << upper_bound <<endl;
                     
#endif
        //generate set used to make swap in alt proposal dist
        int random_num = generate_discrete_uniform(random_engine, lower_bound , upper_bound);

        
        unordered_set<size_t> sites_to_swap = gamma[random_num];

        // swap alleles at sites in chosen set using snarl indexes
        for(auto iter = sites_to_swap.begin(); iter != sites_to_swap.end(); ++iter){
            const Snarl* snarl_to_swap = snarls.translate_snarl_num(*iter); 
            genome.swap_alleles(*snarl_to_swap, haplotype_0, haplotype_1);
        }

        return sites_to_swap;
        
    }

    
    


}



